Preparing bulk RNAseq figures for publication.
Load libraries
library(tidyverse)
library("AnnotationDbi")
library("org.Hs.eg.db")
library(DESeq2)
library(purrr)
library(pheatmap)
library(viridis)
# library(clusterProfiler)
library(GO.db)
library(TCseq)
library(clusterProfiler)
library(patchwork)
library(reshape2)
set.seed(42)
dds <- readRDS("../data/dds.rds")
# res <- readRDS('../data/res.rds')
###Euclidean distance
rld <- rlog(dds, blind = TRUE)
sampleDist <- rld %>% assay() %>% t() %>% dist
sampleDistMatrix <- as.matrix(sampleDist)
rownames(sampleDistMatrix) <- paste(colnames(rld), " - ", rld$timepoint)
colnames(sampleDistMatrix) <- NULL
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDist, clustering_distance_cols = sampleDist,
col = rev(plasma(255)), treeheight_col = 0, treeheight_row = 100, border_color = NA)
Figure 2.1: Sample distance matrix (heatmap) using rlog transformed values
rv <- rowVars(assay(rld))
rv <- order(rv, decreasing = TRUE)[seq_len(min(500, length(rv)))]
pca <- prcomp(t(assay(rld)[rv, ]))
percentVar <- pca$sdev^2/sum(pca$sdev^2)
percentVar <- round(percentVar * 100, digits = 2)
intgroup.df <- as.data.frame(colData(rld)[, "timepoint", drop = FALSE])
d <- data.frame(PC1 = pca$x[, 1], PC2 = pca$x[, 2], PC3 = pca$x[, 3], intgroup.df,
name = colnames(rld))
# Vector for time point colors
cc <- c("#F7E51D", "#38AF7A", "#34618D", "#41235A")
names(cc) <- c("NPC", "d5", "d25", "d45")
# Vector of colors for each sample
m <- cc[d$timepoint]
ggplot(d, aes(PC1, PC2, color = timepoint)) + geom_point(size = 10) + xlab(paste0("PC1: ",
percentVar[1], "% variance")) + theme_gray(base_size = 24) + scale_color_manual(values = m) +
ylab(paste0("PC2: ", percentVar[2], "% variance"))
genes <- c("MKI67", "SOX2", "HES1", "HES5", "NES", "PAX6", "TUBB3", "STMN1", "FAT3",
"DCX", "RBFOX3", "ZBTB38", "MAPT", "CAMK2A", "CAMK4", "NRXN1", "NCAM1", "SNAP25",
"DLG4", "SYP", "SYN1", "SYT1", "ANK3", "SPTBN4", "SCN2A", "GRIN1", "GRIN2B",
"GRIA1", "GRIA2", "GABRA3", "GABRB2", "ARC", "NPAS4", "FOS", "SLC17A7", "GAD1",
"FOXG1")
ncounts1 <- dds[dds@rowRanges@elementMetadata@listData[["gene_symbol"]] %in% genes]
ncounts <- counts(ncounts1, normalize = T)
rownames(ncounts) <- ncounts1@rowRanges@elementMetadata@listData[["gene_symbol"]]
colnames(ncounts) <- c("NPC_1", "d5_1", "d25_1", "d45_1", "NPC_2", "NPC_3", "d5_2",
"d25_2", "d45_2", "d5_3", "d25_3", "d45_3")
ncounts <- ncounts[, c(1, 5, 6, 2, 7, 10, 3, 8, 11, 4, 9, 12)]
ncounts <- ncounts[match(genes, rownames(ncounts)), ]
my_zscale <- function(x) {
M <- rowMeans(x, na.rm = TRUE)
nsamples <- ncol(x)
DF <- nsamples - 1L
IsNA <- is.na(x)
if (any(IsNA)) {
mode(IsNA) <- "integer"
DF <- DF - rowSums(IsNA)
DF[DF == 0L] <- 1L
}
x <- x - M
V <- rowSums(x^2L, na.rm = TRUE)/DF
x <- x/sqrt(V + 0.01)
out <- melt(x)
out
}
zcounts <- my_zscale(ncounts)
lims <- c(min(zcounts$value), max(zcounts$value)) %>% max()
ggplot(data = zcounts, aes(x = Var2, y = Var1, fill = value)) + geom_tile() + scale_fill_viridis_c(option = "viridis",
limits = c(-lims, lims), breaks = c(-2, 0, 2)) + scale_y_discrete(limits = rev(levels(zcounts$Var1)),
position = "right") + geom_hline(yintercept = c(3.5, 6.5, 12.5, 15.5, 20.5, 27.5,
31.5), col = "white", size = 2) + theme_minimal(base_size = 20) + labs(fill = "Z-Score",
x = "", y = "") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
axis.text.y = element_text(face = "italic"), legend.position = "bottom", legend.key.width = unit(2,
"cm"))
rm(list = setdiff(ls(), c("dds", "TPM", "TC_plots", "TC_res", "GO_smooth_line_plot",
"kk2foldchange", "my_GO_plot", "my_zscale")))
# change file_path to directory of featureCounts
tbl <- list.files(path = "../raw_data/featureCounts/", pattern = "*.tsv", full.names = T)
TPM <- map(tbl, read.table, header = T, comment.char = "") %>% map(15)
names(TPM) <- c("mat1", "mat10", "mat11", "mat12", "mat2", "mat3", "mat4", "mat5",
"mat6", "mat7", "mat8", "mat9")
TPM <- TPM %>% as.data.frame()
rownames(TPM) <- read.table(tbl[1], header = T, comment.char = "")[, 4] %>% substr(1,
15)
TPM$gene_name <- read.table(tbl[1], header = T, comment.char = "")$name
TPM$ENSID <- rownames(TPM)
mat <- TPM[, 1:12]
gene_symbol <- rownames(mat)
names(gene_symbol) <- TPM$gene_name
GO_smooth_line_plot <- function(data) {
rowm <- rowMeans(df)
names(rowm) <- c("NPC", "d5", "d25", "d45", "NPC", "NPC", "d5", "d25", "d45",
"d5", "d25", "d45")
rowm <- as.data.frame(rowm)
rowm$timepoint <- c("NPC", "d5", "d25", "d45", "NPC", "NPC", "d5", "d25", "d45",
"d5", "d25", "d45")
rowm$timepoint_sample <- c("NPC_1", "d5_1", "d25_1", "d45_1", "NPC_2", "NPC_3",
"d5_2", "d25_2", "d45_2", "d5_3", "d25_3", "d45_3")
p <- mutate(rowm, timepoint = factor(timepoint, ordered = T, c("NPC", "d5", "d25",
"d45")))
y <- aggregate(p[, 1], list(p$timepoint), mean)
se <- aggregate(p[, 1], list(p$timepoint), sd)
df <- data.frame(timepoint = ordered(c("NPC", "d5", "d25", "d45"), levels = c("NPC",
"d5", "d25", "d45")), TPM = y$x, se = se$x)
print(ggplot(NULL, aes(x = as.numeric(timepoint))) + geom_smooth(data = df, aes(as.numeric(timepoint),
TPM, ymin = TPM - se, ymax = TPM + se), stat = "identity", col = "red") +
scale_x_discrete(limits = c("NPC", "d5", "d25", "d45")) + scale_y_log10() +
theme_classic(base_size = 24) + theme(legend.title = element_blank(), axis.title.x = element_blank(),
axis.text = element_text(size = 30), axis.title.y = element_text(size = 30)) +
ylab("TPM") + coord_cartesian(xlim = c(1, 4.1), expand = FALSE))
}
go_id = GOID(GOTERM[Term(GOTERM) == "negative regulation of neuron death"])
allegs <- get(go_id, org.Hs.egGO2ALLEGS)
genes <- unlist(mget(allegs, org.Hs.egSYMBOL))
genes <- unique(genes)
Number of unique genes in GO:1901215: 208
df <- t(mat[gene_symbol[genes], ])
colnames(df) <- c(genes)
df <- as.data.frame(df)
all_na <- function(x) any(!is.na(x))
df <- df %>% select_if(all_na)
Number of genes from GO:1901215 in dataset: 200
GO_table <- function(genes) {
df <- mat[gene_symbol[genes], ]
rownames(df) <- c(genes)
df <- na.omit(df)
df <- as.data.frame(df)
NPC <- rowMeans(df[, c(1, 5, 6)])
d5 <- rowMeans(df[, c(2, 7, 10)])
d25 <- rowMeans(df[, c(3, 8, 11)])
d45 <- rowMeans(df[, c(4, 9, 12)])
df_mean <- data.frame(NPC = NPC, d5 = d5, d25 = d25, d45 = d45)
df_mean
}
GO_table(genes) %>% write.csv(file = "../data/genes_negative_regulation_neuron_death.csv")
# %>% DT::datatable(extensions = 'Buttons', options = list(dom = 'Blfrtip',
# buttons = c('csv', 'excel'), lengthMenu = list(c(10,25,50,-1),
# c(10,25,50,'All')) ), width = '300px')
GO_smooth_line_plot(df)
go_id = GOID(GOTERM[Term(GOTERM) == "execution phase of apoptosis"])
allegs <- get(go_id, org.Hs.egGO2ALLEGS)
genes <- unlist(mget(allegs, org.Hs.egSYMBOL))
genes <- unique(genes)
Number of unique genes in GO:0097194: 88
df <- t(mat[gene_symbol[genes], ])
colnames(df) <- c(genes)
df <- as.data.frame(df)
all_na <- function(x) any(!is.na(x))
df <- df %>% select_if(all_na)
Number of genes from GO:0097194 in dataset: 87
GO_table(genes) %>% write.csv(file = "../data/genes_execution_phase_of_apoptosis.csv")
# %>% DT::datatable(extensions = 'Buttons', options = list(dom = 'Blfrtip',
# buttons = c('csv', 'excel'), lengthMenu = list(c(10,25,50,-1),
# c(10,25,50,'All')) ), width = '300px')
GO_smooth_line_plot(df)
genes <- c("BAX", "BAK1", "BCL2", "BCL2L1", "BCL2L2", "MCL1", "BID", "BAD", "HRK",
"BOK", "BCL2L11", "BBC3", "PMAIP1", "BMF")
ncounts1 <- dds[dds@rowRanges@elementMetadata@listData[["gene_symbol"]] %in% genes]
ncounts <- counts(ncounts1, normalize = T)
rownames(ncounts) <- ncounts1@rowRanges@elementMetadata@listData[["gene_symbol"]]
colnames(ncounts) <- c("NPC_1", "d5_1", "d25_1", "d45_1", "NPC_2", "NPC_3", "d5_2",
"d25_2", "d45_2", "d5_3", "d25_3", "d45_3")
ncounts <- ncounts[, c(1, 5, 6, 2, 7, 10, 3, 8, 11, 4, 9, 12)]
ncounts <- ncounts[match(genes, rownames(ncounts)), ]
ncounts <- ncounts[match(genes, rownames(ncounts)), ]
NPC <- rowMeans(ncounts[, 1:3])
d5 <- rowMeans(ncounts[, 4:6])
d25 <- rowMeans(ncounts[, 7:9])
d45 <- rowMeans(ncounts[, 10:12])
av_ncounts <- cbind(NPC, d5, d25, d45)
zcounts <- my_zscale(av_ncounts)
ggplot(data = zcounts, aes(x = Var2, y = Var1, fill = value)) + geom_tile() + scale_fill_viridis_c(option = "viridis",
limits = c(-1.5, 1.5), breaks = c(-1.5, 0, 1.5)) + scale_y_discrete(limits = rev(levels(zcounts$Var1))) +
geom_hline(yintercept = c(12.5, 8.5), col = "white", size = 3) + theme_minimal(base_size = 30) +
labs(fill = "Z-Score", x = "", y = "") + theme(axis.text.x = element_text(angle = 0,
vjust = 0.5), axis.text.y = element_text(face = "italic"), legend.title = element_text(size = 24),
legend.text = element_text(size = 20)) + scale_x_discrete(position = "top")
df <- t(TPM[gene_symbol[c("BAX", "BCL2")], 1:12])
colnames(df) <- c("BAX", "BCL2")
df <- as.data.frame(df)
df$timepoint_sample <- c("NPC_1", "d5_1", "d25_1", "d45_1", "NPC_2", "NPC_3", "d5_2",
"d25_2", "d45_2", "d5_3", "d25_3", "d45_3")
df$timepoint <- c("NPC", "d5", "d25", "d45", "NPC", "NPC", "d5", "d25", "d45", "d5",
"d25", "d45")
p <- mutate(df, timepoint = factor(timepoint, ordered = T, c("NPC", "d5", "d25",
"d45")))
BAX <- aggregate(p[, 1], list(p$timepoint), mean)
BAX_se <- aggregate(p[, 1], list(p$timepoint), sd)
BCL2 <- aggregate(p[, 2], list(p$timepoint), mean)
BCL2_se <- aggregate(p[, 2], list(p$timepoint), sd)
df2 <- data.frame(timepoint = ordered(c("NPC", "d5", "d25", "d45"), levels = c("NPC",
"d5", "d25", "d45")), BAX = BAX$x, BAX_se = BAX_se$x, BCL2 = BCL2$x, BCL2_se = BCL2_se$x)
ggplot(NULL, aes(x = as.numeric(timepoint))) + geom_smooth(data = df2, aes(as.numeric(timepoint),
BAX, ymin = BAX - BAX_se, ymax = BAX + BAX_se, color = "BAX", fill = "BAX"),
stat = "identity") + geom_smooth(data = df2, aes(as.numeric(timepoint), BCL2,
ymin = BCL2 - BCL2_se, ymax = BCL2 + BCL2_se, color = "BCL2", fill = "BCL2"),
stat = "identity") + scale_x_discrete(limits = c("NPC", "d5", "d25", "d45")) +
scale_y_log10() + theme_classic(base_size = 28) + theme(legend.title = element_blank(),
axis.title.x = element_blank(), legend.text = element_text(face = "italic"),
legend.position = c(0.85, 0.9)) + ylab("TPM") + coord_cartesian(xlim = c(1, 4.1),
expand = FALSE) + scale_colour_manual(name = "Gene", values = c(BAX = "red",
BCL2 = "#5FB568"), labels = c("BAX", "BCL2"), aesthetics = c("color", "fill"))
rm(list = setdiff(ls(), c("dds", "TPM", "TPM_long", "gene_symbol", "GO_smooth_line_plot",
"TC_plots", "TC_res", "kk2foldchange", "my_GO_plot", "my_zscale")))
design <- as.data.frame(colData(dds))
design$sampleid <- rownames(design)
design$group <- design$batch
# design <- design[,c(1,4,5)]
design$timepoint2 <- c("NPC-1", "d5-1", "d25-1", "d45-1", "NPC-2", "NPC-3", "d5-2",
"d25-2", "d45-2", "d5-3", "d25-3", "d45-3")
design$timepoint2 <- factor(design$timepoint2, levels = c("NPC-1", "NPC-2", "NPC-3",
"d5-1", "d5-2", "d5-3", "d25-1", "d25-2", "d25-3", "d45-1", "d45-2", "d45-3"))
design <- design[, c(1, 6, 4, 5)]
tbl <- list.files(path = "../raw_data/featureCounts/", pattern = "*.tsv", full.names = T)
l <- read.table(tbl[1], header = T, comment.char = "")
l <- l[, c(1, 2, 3, 4, 7)]
l$gene_id <- l$gene_id %>% substr(1, 15)
counts <- counts(dds)
id <- dds@rowRanges@elementMetadata$gene_symbol
id <- as.data.frame(id)
id$ENS <- rownames(dds)
x <- merge(id, l, by.x = 2, by.y = 4, sort = F)
features <- data.frame(id = x$ENS, chr = x$X.chrom, start = x$chromStart, end = x$chromEnd)
tca <- TCA(design, counts = counts, genomicFeature = features)
tca <- DBanalysis(tca, filter.type = "raw", filter.value = 10, samplePassfilter = 2)
tca <- timecourseTable(tca, value = "expression", norm.method = "rpkm", filter = TRUE)
t <- tcTable(tca)
tca <- timeclust(tca, algo = "cm", k = 12, standardize = TRUE)
TC_plots <- timeclustplot(tca, value = "z-score(RPKM)", cols = 3, membership.color = plasma(200))
cluster <- tca@clusterRes@cluster
df <- as.data.frame(cluster)
ens.str <- rownames(df)
df$symbol <- mapIds(org.Hs.eg.db, keys = ens.str, column = "SYMBOL", keytype = "ENSEMBL",
multiVals = "first")
## 'select()' returned 1:many mapping between keys and columns
for (i in seq_along(df$cluster)) {
df$membership_percentage[i] <- tca@clusterRes@membership[i, df$cluster[i]]
}
TC_res <- split(df, as.factor(df$cluster))
for (i in 1:length(TC_res)) {
ens.str <- rownames(TC_res[[i]])
TC_res[[i]]$ENTREZ <- mapIds(org.Hs.eg.db, keys = ens.str, column = "ENTREZID",
keytype = "ENSEMBL", multiVals = "first")
}
df <- as.data.frame(TC_res[[2]])
sigGenes <- df$ENTREZ
sigGenes <- na.exclude(sigGenes)
TC_plots[[2]] + theme_classic(base_size = 24) + scale_color_viridis(breaks = c(0.2,
0.4, 0.6, 0.8), option = "C")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
# Function for calculating the fold changes from the GeneRatio/BgRatio values
# from the output of enrichGO
kk2foldchange <- function(kk) {
GO <- as.data.frame(kk)
GeneRatio <- strsplit(GO$GeneRatio, "/")
GRdf <- data.frame()
for (i in seq_along(GeneRatio)) {
GRdf[i, 1] <- GeneRatio[[i]][1]
GRdf[i, 2] <- GeneRatio[[i]][2]
}
GRdf$V1 <- as.numeric(GRdf$V1)
GRdf$V2 <- as.numeric(GRdf$V2)
GO$GeneRatio2 <- GRdf$V1/GRdf$V2
BgRatio <- strsplit(GO$BgRatio, "/")
BgRdf <- data.frame()
for (i in seq_along(BgRatio)) {
BgRdf[i, 1] <- BgRatio[[i]][1]
BgRdf[i, 2] <- BgRatio[[i]][2]
}
BgRdf$V1 <- as.numeric(BgRdf$V1)
BgRdf$V2 <- as.numeric(BgRdf$V2)
GO$BgRatio2 <- BgRdf$V1/BgRdf$V2
GO$foldchange <- GO$GeneRatio2/GO$BgRatio2
return(GO)
}
kk <- enrichGO(gene = sigGenes, ont = "BP", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[1:length(GO$ID), ]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(24, 25, 27, 28, 30), ]
my_GO_plot <- function() {
ggplot(GO_ordered, aes(foldchange, reorder(stringr::str_wrap(Description, 40),
foldchange))) + geom_col(aes(fill = p.adjust)) + theme_minimal(base_size = 24) +
xlab("fold enrichment") + ylab("") + labs(fill = "p.adjust")
}
BP <- my_GO_plot()
### GO plot - molecular function
kk <- enrichGO(gene = sigGenes, ont = "MF", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(23, 26, 27, 29, 30), ]
MF <- my_GO_plot()
### GO plot - cellular component
kk <- enrichGO(gene = sigGenes, ont = "CC", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(23, 24, 25, 28, 30), ]
CC <- my_GO_plot()
(c2 <- BP/MF/CC)
df <- as.data.frame(TC_res[[3]])
sigGenes <- df$ENTREZ
sigGenes <- na.exclude(sigGenes)
TC_plots[[3]] + theme_classic(base_size = 24) + scale_color_viridis(breaks = c(0.2,
0.4, 0.6, 0.8), option = "C")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
# GO plot - biological process
kk <- enrichGO(gene = sigGenes, ont = "BP", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[1:length(GO$ID), ]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(24, 25, 28, 29, 30), ]
BP <- my_GO_plot()
# GO plot - molecular function
kk <- enrichGO(gene = sigGenes, ont = "MF", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(22, 26, 27, 29, 30), ]
MF <- my_GO_plot()
# GO plot - cellular component
kk <- enrichGO(gene = sigGenes, ont = "CC", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(25, 26, 28, 29, 30), ]
CC <- my_GO_plot()
# combined plots
(c3 <- BP/MF/CC)
rm(list = setdiff(ls(), c("dds", "TPM", "gene_symbol", "TC_plots", "TC_res", "GO_smooth_line_plot",
"kk2foldchange", "my_GO_plot", "my_zscale")))
df <- as.data.frame(TC_res[[10]])
sigGenes <- df$ENTREZ
sigGenes <- na.exclude(sigGenes)
TC_plots[[10]] + theme_classic(base_size = 24) + scale_color_viridis(breaks = c(0.2,
0.4, 0.6, 0.8), option = "C")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
# GO-plot - biological process
kk <- enrichGO(gene = sigGenes, ont = "BP", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[1:length(GO$ID), ]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(19, 20, 25, 27, 30), ]
BP <- my_GO_plot()
# GO plot - molecular function
kk <- enrichGO(gene = sigGenes, ont = "MF", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(21, 24, 27, 29, 30), ]
MF <- my_GO_plot()
# GO plot - cellular component
kk <- enrichGO(gene = sigGenes, ont = "CC", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(8, 19, 23, 24, 28), ]
CC <- my_GO_plot()
(c10 <- BP/MF/CC)
df <- as.data.frame(TC_res[[1]])
sigGenes <- df$ENTREZ
sigGenes <- na.exclude(sigGenes)
TC_plots[[1]] + theme_classic(base_size = 24) + scale_color_viridis(breaks = c(0.2,
0.4, 0.6, 0.8), option = "C")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
kk <- enrichGO(gene = sigGenes, ont = "BP", OrgDb = "org.Hs.eg.db")
GO <- kk2foldchange(kk)
if (length(GO$ID) > 30) {
GOsub <- GO[1:30, ]
} else {
GOsub <- GO[1:length(GO$ID), ]
}
GO_ordered <- GOsub[order(GOsub$foldchange), ]
GO_ordered$foldchange <- as.numeric(GO_ordered$foldchange)
GO_ordered$Description <- factor(GO_ordered$Description, levels = GO_ordered$Description[order(GO_ordered$foldchange,
decreasing = FALSE)])
GO_ordered <- GO_ordered[c(2, 5, 10, 12, 13), ]
(c1 <- my_GO_plot())
rm(list = setdiff(ls(), c("dds", "TPM", "TPM_long", "gene_symbol", "GO_smooth_line_plot",
"TC_plots", "TC_res", "kk2foldchange", "my_GO_plot")))
df <- t(TPM[gene_symbol[c("APAF1", "XIAP")], 1:12])
colnames(df) <- c("APAF1", "XIAP")
df <- as.data.frame(df)
df$timepoint_sample <- c("NPC_1", "d5_1", "d25_1", "d45_1", "NPC_2", "NPC_3", "d5_2",
"d25_2", "d45_2", "d5_3", "d25_3", "d45_3")
df$timepoint <- c("NPC", "d5", "d25", "d45", "NPC", "NPC", "d5", "d25", "d45", "d5",
"d25", "d45")
p <- mutate(df, timepoint = factor(timepoint, ordered = T, c("NPC", "d5", "d25",
"d45")))
APAF1 <- aggregate(p[, 1], list(p$timepoint), mean)
APAF1_se <- aggregate(p[, 1], list(p$timepoint), sd)
XIAP <- aggregate(p[, 2], list(p$timepoint), mean)
XIAP_se <- aggregate(p[, 2], list(p$timepoint), sd)
df2 <- data.frame(timepoint = ordered(c("NPC", "d5", "d25", "d45"), levels = c("NPC",
"d5", "d25", "d45")), APAF1 = APAF1$x, APAF1_se = APAF1_se$x, XIAP = XIAP$x,
XIAP_se = XIAP_se$x)
ggplot(NULL, aes(x = as.numeric(timepoint))) + geom_smooth(data = df2, aes(as.numeric(timepoint),
APAF1, ymin = APAF1 - APAF1_se, ymax = APAF1 + APAF1_se, color = "APAF1", fill = "APAF1"),
stat = "identity") + geom_smooth(data = df2, aes(as.numeric(timepoint), XIAP,
ymin = XIAP - XIAP_se, ymax = XIAP + XIAP_se, color = "XIAP", fill = "XIAP"),
stat = "identity") + scale_x_discrete(limits = c("NPC", "d5", "d25", "d45")) +
scale_y_log10() + theme_classic(base_size = 28) + theme(legend.title = element_blank(),
axis.title.x = element_blank(), legend.position = c(0.25, 0.9), legend.text = element_text(face = "italic"),
legend.background = element_blank()) + ylab("TPM") + coord_cartesian(xlim = c(1,
4.1), expand = FALSE) + scale_colour_manual(name = "Gene", values = c(APAF1 = "red",
XIAP = "#5FB568"), labels = c("APAF1", "XIAP"), aesthetics = c("color", "fill"))
rm(list = setdiff(ls(), c("dds", "TPM", "TPM_long", "gene_symbol", "GO_smooth_line_plot",
"TC_plots", "TC_res", "kk2foldchange", "my_GO_plot")))
tbl <- list.files(path = "../raw_data/featureCounts/", pattern = "*.tsv", full.names = T)
counts <- map(tbl, read.table, header = T, comment.char = "") %>% map(9)
names(counts) <- c("mat1", "mat10", "mat11", "mat12", "mat2", "mat3", "mat4", "mat5",
"mat6", "mat7", "mat8", "mat9")
counts <- counts %>% as.data.frame()
rownames(counts) <- read.table(tbl[1], header = T, comment.char = "")[, 4] %>% substr(1,
15)
gene_symbol <- read.table(tbl[1], header = T, comment.char = "")[, 7] %>% as.vector()
tbl2 <- list.files(path = "../raw_data/featureCountsMS/", pattern = "*.tsv", full.names = T)
counts2 <- map(tbl2, read.table, header = T, comment.char = "") %>% map(9)
names(counts2) <- c("CL28_1", "CL_28_2", "CL_68_1", "CL_69_1", "CL_69_2")
counts2 <- counts2 %>% as.data.frame()
rownames(counts2) <- read.table(tbl2[1], header = T, comment.char = "")[, 4] %>%
substr(1, 15)
gene_symbol <- read.table(tbl2[1], header = T, comment.char = "")[, 7] %>% as.vector()
sample_info contains information about the day of harvest and the batch of the regarding samples.
sample_info <- data.frame(timepoint = c("d0", "d5", "d25", "d45", "d0", "d0", "d5",
"d25", "d45", "d5", "d25", "d45", "CL28_d28", "CL28_d28", "CL68_d28", "CL69_d28",
"CL69_d28"), batch = c("progenitors", "batch3", "batch3", "batch3", "progenitors",
"progenitors", "batch1", "batch1", "batch1", "batch2", "batch2", "batch2", "batch4",
"batch4", "batch4", "batch4", "batch4"), row.names = c(names(counts), names(counts2)))
combining datasets:
counts <- cbind(counts, counts2)
rm(counts2)
Creating the DESeqDataSet:
dds2 <- DESeqDataSetFromMatrix(countData = counts, colData = sample_info, design = ~timepoint)
featureData <- data.frame(gene_symbol = gene_symbol)
mcols(dds2) <- DataFrame(mcols(dds2), featureData)
dds2 <- DESeq(dds2)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
dds2@colData@listData$timepoint <- c("NPC", "d5", "d25", "d45", "NPC", "NPC", "d5",
"d25", "d45", "d5", "d25", "d45", "CL28_d28", "CL28_d28", "CL68_d28", "CL69_d28",
"CL69_d28")
my_levels <- c("NPC", "d5", "d25", "d45", "CL28_d28", "CL68_d28", "CL69_d28")
dds2@colData@listData$timepoint <- factor(dds2@colData@listData$timepoint, levels = my_levels)
genes <- c("CASP1", "CASP2", "CASP3", "CASP6", "CASP7", "CASP8", "CASP10", "BAX",
"BAK1", "BCL2", "BCL2L1", "BCL2L2", "MCL1", "BID", "BAD", "HRK", "BOK", "BCL2L11",
"BBC3", "PMAIP1", "BMF", "APAF1", "PARK2", "PARK7", "PPARGC1A", "CYCS", "BIRC2",
"BIRC5", "AREL1", "XIAP", "HTRA2", "SIVA1", "SIAH1", "USP11", "PRKCE", "ATF4",
"CEBPB", "SOD1", "SOD2", "XRCC2", "PARP2", "ATG7", "KPNA1", "KPNB1", "DEDD2")
ncounts1 <- dds2[dds2@rowRanges@elementMetadata@listData[["gene_symbol"]] %in% genes]
ncounts <- counts(ncounts1, normalize = T)
rownames(ncounts) <- ncounts1@rowRanges@elementMetadata@listData[["gene_symbol"]]
colnames(ncounts) <- c("NPC_1", "d5_1", "d25_1", "d45_1", "NPC_2", "NPC_3", "d5_2",
"d25_2", "d45_2", "d5_3", "d25_3", "d45_3", "CL28_d28_1", "CL28_d28_2", "CL68_d28_1",
"CL69_d28_1", "CL69_d28_2")
ncounts <- ncounts[, c(1, 5, 6, 2, 7, 10, 3, 8, 11, 4, 9, 12, 13, 14, 15, 16, 17)]
ncounts <- ncounts[match(genes, rownames(ncounts)), ]
my_zscale <- function(x) {
M <- rowMeans(x, na.rm = TRUE)
nsamples <- ncol(x)
DF <- nsamples - 1L
IsNA <- is.na(x)
if (any(IsNA)) {
mode(IsNA) <- "integer"
DF <- DF - rowSums(IsNA)
DF[DF == 0L] <- 1L
}
x <- x - M
V <- rowSums(x^2L, na.rm = TRUE)/DF
x <- x/sqrt(V + 0.01)
out <- melt(x)
out
}
ncounts_mean <- data.frame(NPCs = rowMeans(ncounts[, 1:3]), d5 = rowMeans(ncounts[,
4:6]), d25 = rowMeans(ncounts[, 7:9]), d45 = rowMeans(ncounts[, 10:12]), CL28_d28 = rowMeans(ncounts[,
13:14]), CL68_d28 = ncounts[, 15], CL69_d28 = rowMeans(ncounts[, 16:17]))
ncounts_mean <- as.matrix(ncounts_mean)
zcounts <- my_zscale(ncounts_mean)
lims <- c(min(zcounts$value), max(zcounts$value)) %>% max()
ggplot(data = zcounts, aes(x = Var2, y = Var1, fill = value)) + geom_tile() + scale_fill_viridis_c(option = "viridis",
limits = c(-lims, lims), breaks = c(-2, 0, 2)) + scale_y_discrete(limits = rev(levels(zcounts$Var1)),
position = "right") + geom_hline(yintercept = c(10.5, 15.5, 19.5, 23.5, 38.5),
col = "white", size = 2) + theme_minimal(base_size = 20) + labs(fill = "Z-Score",
x = "", y = "") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
axis.text.y = element_text(face = "italic"), legend.position = "bottom", legend.key.width = unit(2,
"cm"))
driver <- c("NCOA4", "ATF4", "VDAC2", "VDAC3", "POR", "LPCAT3", "MAP1LC3B", "TFRC",
"MAP1LC3A", "HMOX1", "YAP1", "EPAS1", "TF")
suppressor <- c("GSS", "PROM2", "AIFM2", "FTL", "GPX4", "SLC3A2", "NFE2L2", "SLC7A11",
"GCLC")
mat <- TPM[, 1:12]
gene_symbol <- rownames(mat)
names(gene_symbol) <- TPM$gene_name
genes <- driver
df <- t(mat[gene_symbol[genes], ])
colnames(df) <- c(genes)
df <- as.data.frame(df)
all_na <- function(x) any(!is.na(x))
df <- df %>% select_if(all_na)
GO_smooth_line_plot(df)
ncounts1 <- dds[dds@rowRanges@elementMetadata@listData[["gene_symbol"]] %in% genes]
ncounts <- counts(ncounts1, normalize = T)
rownames(ncounts) <- ncounts1@rowRanges@elementMetadata@listData[["gene_symbol"]]
ncounts <- ncounts[, 1:12]
colnames(ncounts) <- c("NPC_1", "d5_1", "d25_1", "d45_1", "NPC_2", "NPC_3", "d5_2",
"d25_2", "d45_2", "d5_3", "d25_3", "d45_3")
ncounts <- ncounts[, c(1, 5, 6, 2, 7, 10, 3, 8, 11, 4, 9, 12)]
ncounts <- ncounts[match(genes, rownames(ncounts)), ]
ncounts <- ncounts[match(genes, rownames(ncounts)), ]
NPC <- rowMeans(ncounts[, 1:3])
d5 <- rowMeans(ncounts[, 4:6])
d25 <- rowMeans(ncounts[, 7:9])
d45 <- rowMeans(ncounts[, 10:12])
av_ncounts <- cbind(NPC, d5, d25, d45)
zcounts <- my_zscale(av_ncounts)
ggplot(data = zcounts, aes(x = Var2, y = Var1, fill = value)) + geom_tile() + scale_fill_viridis_c(option = "viridis",
limits = c(-1.5, 1.5), breaks = c(-1.5, 0, 1.5)) + scale_y_discrete(limits = rev(levels(zcounts$Var1))) +
# geom_hline(yintercept = c(12.5,8.5), col='white', size=3)+
theme_minimal(base_size = 30) + labs(fill = "Z-Score", x = "", y = "") + theme(axis.text.x = element_text(angle = 0,
vjust = 0.5), axis.text.y = element_text(face = "italic"), legend.title = element_text(size = 24),
legend.text = element_text(size = 20)) + scale_x_discrete(position = "top")
genes <- suppressor
df <- t(mat[gene_symbol[genes], ])
colnames(df) <- c(genes)
df <- as.data.frame(df)
all_na <- function(x) any(!is.na(x))
df <- df %>% select_if(all_na)
GO_smooth_line_plot(df)
ncounts1 <- dds[dds@rowRanges@elementMetadata@listData[["gene_symbol"]] %in% genes]
ncounts <- counts(ncounts1, normalize = T)
rownames(ncounts) <- ncounts1@rowRanges@elementMetadata@listData[["gene_symbol"]]
ncounts <- ncounts[, 1:12]
colnames(ncounts) <- c("NPC_1", "d5_1", "d25_1", "d45_1", "NPC_2", "NPC_3", "d5_2",
"d25_2", "d45_2", "d5_3", "d25_3", "d45_3")
ncounts <- ncounts[, c(1, 5, 6, 2, 7, 10, 3, 8, 11, 4, 9, 12)]
ncounts <- ncounts[match(genes, rownames(ncounts)), ]
ncounts <- ncounts[match(genes, rownames(ncounts)), ]
NPC <- rowMeans(ncounts[, 1:3])
d5 <- rowMeans(ncounts[, 4:6])
d25 <- rowMeans(ncounts[, 7:9])
d45 <- rowMeans(ncounts[, 10:12])
av_ncounts <- cbind(NPC, d5, d25, d45)
zcounts <- my_zscale(av_ncounts)
ggplot(data = zcounts, aes(x = Var2, y = Var1, fill = value)) + geom_tile() + scale_fill_viridis_c(option = "viridis",
limits = c(-1.5, 1.5), breaks = c(-1.5, 0, 1.5)) + scale_y_discrete(limits = rev(levels(zcounts$Var1))) +
# geom_hline(yintercept = c(12.5,8.5), col='white', size=3)+
theme_minimal(base_size = 30) + labs(fill = "Z-Score", x = "", y = "") + theme(axis.text.x = element_text(angle = 0,
vjust = 0.5), axis.text.y = element_text(face = "italic"), legend.title = element_text(size = 24),
legend.text = element_text(size = 20)) + scale_x_discrete(position = "top")
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] reshape2_1.4.4 patchwork_1.1.0
## [3] clusterProfiler_3.16.1 TCseq_1.12.1
## [5] GO.db_3.11.4 viridis_0.5.1
## [7] viridisLite_0.3.0 pheatmap_1.0.12
## [9] DESeq2_1.28.1 SummarizedExperiment_1.18.2
## [11] DelayedArray_0.14.1 matrixStats_0.57.0
## [13] GenomicRanges_1.40.0 GenomeInfoDb_1.24.2
## [15] org.Hs.eg.db_3.11.4 AnnotationDbi_1.50.3
## [17] IRanges_2.22.2 S4Vectors_0.26.1
## [19] Biobase_2.48.0 BiocGenerics_0.34.0
## [21] forcats_0.5.0 stringr_1.4.0
## [23] dplyr_1.0.2 purrr_0.3.4
## [25] readr_1.4.0 tidyr_1.1.2
## [27] tibble_3.0.4 ggplot2_3.3.2
## [29] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.0 fastmatch_1.1-0
## [4] systemfonts_1.0.3 plyr_1.8.6 igraph_1.2.6
## [7] splines_4.0.3 BiocParallel_1.22.0 urltools_1.7.3
## [10] digest_0.6.27 htmltools_0.5.2 GOSemSim_2.14.2
## [13] magrittr_2.0.1 memoise_1.1.0 cluster_2.1.0
## [16] limma_3.44.3 Biostrings_2.56.0 annotate_1.66.0
## [19] graphlayouts_0.7.1 modelr_0.1.8 svglite_2.0.0
## [22] enrichplot_1.8.1 prettyunits_1.1.1 colorspace_2.0-0
## [25] blob_1.2.1 rvest_0.3.6 ggrepel_0.8.2
## [28] haven_2.3.1 xfun_0.31 crayon_1.3.4
## [31] RCurl_1.98-1.2 jsonlite_1.7.1 scatterpie_0.1.5
## [34] genefilter_1.70.0 survival_3.2-7 glue_1.6.2
## [37] polyclip_1.10-0 gtable_0.3.0 zlibbioc_1.34.0
## [40] XVector_0.28.0 scales_1.1.1 DOSE_3.14.0
## [43] DBI_1.1.0 edgeR_3.30.3 Rcpp_1.0.5
## [46] xtable_1.8-4 progress_1.2.2 gridGraphics_0.5-0
## [49] bit_4.0.4 europepmc_0.4 httr_1.4.2
## [52] fgsea_1.14.0 RColorBrewer_1.1-2 ellipsis_0.3.2
## [55] pkgconfig_2.0.3 XML_3.99-0.5 farver_2.0.3
## [58] sass_0.4.1 dbplyr_2.0.0 locfit_1.5-9.4
## [61] labeling_0.4.2 ggplotify_0.0.5 tidyselect_1.1.0
## [64] rlang_1.0.3 munsell_0.5.0 cellranger_1.1.0
## [67] tools_4.0.3 downloader_0.4 cli_3.3.0
## [70] generics_0.1.0 RSQLite_2.2.1 ggridges_0.5.2
## [73] broom_0.7.2 evaluate_0.14 fastmap_1.1.0
## [76] yaml_2.2.1 knitr_1.30 bit64_4.0.5
## [79] fs_1.5.0 tidygraph_1.2.0 ggraph_2.0.4
## [82] formatR_1.7 DO.db_2.9 xml2_1.3.2
## [85] compiler_4.0.3 rstudioapi_0.13 e1071_1.7-4
## [88] reprex_0.3.0 tweenr_1.0.1 geneplotter_1.66.0
## [91] bslib_0.3.1 stringi_1.5.3 highr_0.8
## [94] lattice_0.20-41 Matrix_1.2-18 vctrs_0.4.1
## [97] pillar_1.4.7 lifecycle_0.2.0 BiocManager_1.30.10
## [100] triebeard_0.3.0 jquerylib_0.1.4 data.table_1.13.2
## [103] cowplot_1.1.0 bitops_1.0-6 qvalue_2.20.0
## [106] R6_2.5.0 bookdown_0.21 gridExtra_2.3
## [109] MASS_7.3-53 assertthat_0.2.1 withr_2.3.0
## [112] GenomicAlignments_1.24.0 Rsamtools_2.4.0 GenomeInfoDbData_1.2.3
## [115] hms_0.5.3 grid_4.0.3 class_7.3-17
## [118] rvcheck_0.1.8 rmarkdown_2.14 ggforce_0.3.2
## [121] lubridate_1.7.9.2